#!/usr/bin/env python3 """ =============================================================================== 65C02 Assembler — NWasm65.py Target CPU: WDC W65C02S (full instruction set including Rockwell extensions) =============================================================================== Written by Claude Opus 4.6 Extended Prompts by J R Casey Bralla https://www.NerdWorld.org Version history --------------- v1.0.0 2026-03-16 Initial release — full 65C02 opcode table, two-pass assembly, binary (.bin) and listing (.txt) output. v1.0.1 2026-03-16 Fixed listing format for data directives that emit more than 3 bytes (continuation lines). v1.1.0 2026-03-16 Added: expression evaluator (+, -, *, high), .text/.asciiz string directives, .ds/.fill memory fill, .include file inclusion, decimal (!) and binary (&) number prefixes, label trailing-colon support, local labels (. or @ prefix), symbol table appended to listing, improved error tracking with total count, forward-ref .equ resolution. v1.2.0 2026-03-17 Symbol table now shows definition line number and every line number where each symbol is referenced. v1.3.0 2026-03-18 Added CRC-16 checksum (16-bit sum of all binary bytes) to listing summary and console output. Removed bare (unprefixed) hexadecimal literal support — all hex values now require a $ or 0x prefix. v1.4.0 2026-03-18 Added error list section to listing file, printed between the symbol table and the assembly summary. v1.5.0 2026-03-18 .byte directive now accepts quoted strings (single or double) as ASCII data, intermixed with numeric values. Semicolons inside quoted strings are no longer treated as comment delimiters. e.g. .byte "Hello; World",$0D,$0A,'Z',$00 v1.6.0 2026-03-18 Bare decimal numbers now recognised in expressions, enabling label arithmetic such as LABEL+1, TABLE+3, ADDR-2 without needing the ! prefix on the numeric operand. v1.7.0 2026-03-18 Quoted character literals now recognised in expressions and operands: 'A' = $41, '>' = $3E. Works everywhere a numeric value is accepted, including LDA #'>', CMP #'A', .byte ';'+1. Single and double quotes supported. Escape sequences (\\n, \\t, \\0, \\\\) work inside character literals. v1.8.0 2026-03-18 Asterisk (*) now serves as a placeholder for the current program counter in expressions. e.g. BEQ *+3, JMP *, .word * v1.8.1 2026-03-18 Fixed bug where backslash-escaped characters inside quoted strings (e.g. \") could cause the quote tracker to close prematurely, leading subsequent semicolons to be wrongly treated as comment delimiters. Usage ----- python3 asm65c02.py Produces: .bin — raw binary image (starting at .org address) .txt — human-readable listing with symbol table Input syntax (see Example_Source.asm) ------------------------------------- Labels start in column 1 (must begin with a letter, underscore, '.' or '@'). An optional trailing colon is allowed: "MyLabel:" is the same as "MyLabel". Instructions are indented by at least one space or tab. Comments start with ';' and run to end of line. Local labels start with '.' or '@' (e.g. .loop, @retry). They are scoped to the most recent non-local label; internally stored as "PARENT.LOCAL" so they can be reused under different parents. Addressing-mode prefixes ------------------------ #value immediate (8-bit value) %value zero-page %value,X zero-page, X-indexed %value,Y zero-page, Y-indexed value absolute (16-bit address) value,X absolute, X-indexed value,Y absolute, Y-indexed [value] indirect [value,X] pre-indexed indirect (zp,X) [value],Y post-indexed indirect (zp),Y Numeric literals ---------------- $FF hexadecimal ($ prefix) 0xFF hexadecimal (0x prefix) !255 decimal (! prefix) &11111111 binary (& prefix) 255 bare decimal (digits only, valid in expressions) 'A' character literal (ASCII value, single quotes) "Z" character literal (ASCII value, double quotes) * current program counter Expressions (evaluated left-to-right, no precedence) ----------- value+offset addition (e.g. LABEL+1, TABLE+$10) value-offset subtraction (e.g. END_ADDR-2) *+offset PC-relative (e.g. BEQ *+3, JMP *) value*factor multiplication value high byte ((value >> 8) & $FF) Directives ---------- .org $addr set the program-counter / origin .byte $v,... emit literal byte(s); single- or double-quoted strings are treated as ASCII data and may contain semicolons, e.g. .byte "Hi;there",$0D .word $v,... emit literal 16-bit word(s), little-endian .text "string" emit ASCII bytes for a quoted string .asciiz "string" emit ASCII bytes followed by a $00 terminator .ds count reserve (skip) *count* bytes (filled with $00) .fill count,value fill *count* bytes with *value* .equ value assign a constant to the preceding label .include "file" include another source file at this point =============================================================================== """ import sys import os import re # --------------------------------------------------------------------------- # Version string — update with EVERY revision # --------------------------------------------------------------------------- VERSION = "1.8.1" # --------------------------------------------------------------------------- # Addressing-mode constants # Each mode has a short tag used as dictionary keys throughout the code. # --------------------------------------------------------------------------- IMP = "IMP" # Implied (no operand) ACC = "ACC" # Accumulator (operand is A register) IMM = "IMM" # Immediate #nn ZP = "ZP" # Zero-page %nn or auto-detected < $100 ZPX = "ZPX" # Zero-page, X %nn,X ZPY = "ZPY" # Zero-page, Y %nn,Y ABS = "ABS" # Absolute nnnn ABX = "ABX" # Absolute, X nnnn,X ABY = "ABY" # Absolute, Y nnnn,Y IND = "IND" # Indirect [nnnn] (JMP only on NMOS; others=ZPI) IDX = "IDX" # (Zero-page, X) [nn,X] IDY = "IDY" # (Zero-page), Y [nn],Y or [nn,Y] ZPI = "ZPI" # Zero-page indirect [nn] (65C02 only, non-JMP) REL = "REL" # Relative branch target (8-bit signed offset) ZPR = "ZPR" # Zero-page + Relative (BBR/BBS: zp, branch_target) AIX = "AIX" # Absolute indexed ind. [nnnn,X] (JMP only, 65C02) # --------------------------------------------------------------------------- # Instruction size (in bytes) for each addressing mode # --------------------------------------------------------------------------- MODE_SIZE = { IMP: 1, ACC: 1, IMM: 2, ZP: 2, ZPX: 2, ZPY: 2, ABS: 3, ABX: 3, ABY: 3, IND: 3, AIX: 3, IDX: 2, IDY: 2, ZPI: 2, REL: 2, ZPR: 3, } # --------------------------------------------------------------------------- # Complete W65C02S opcode table # Key = (MNEMONIC, addressing_mode) Value = opcode byte # # Sources: WDC W65C02S data sheet, "Programming the 65816" (Eyes/Lichty). # Rockwell bit-manipulation (RMBn, SMBn, BBRn, BBSn) and WDC extras # (WAI, STP) are included for the full W65C02S. # --------------------------------------------------------------------------- OPCODES = { # --- ADC --- ("ADC", IMM): 0x69, ("ADC", ZP): 0x65, ("ADC", ZPX): 0x75, ("ADC", ABS): 0x6D, ("ADC", ABX): 0x7D, ("ADC", ABY): 0x79, ("ADC", IDX): 0x61, ("ADC", IDY): 0x71, ("ADC", ZPI): 0x72, # --- AND --- ("AND", IMM): 0x29, ("AND", ZP): 0x25, ("AND", ZPX): 0x35, ("AND", ABS): 0x2D, ("AND", ABX): 0x3D, ("AND", ABY): 0x39, ("AND", IDX): 0x21, ("AND", IDY): 0x31, ("AND", ZPI): 0x32, # --- ASL --- ("ASL", ACC): 0x0A, ("ASL", ZP): 0x06, ("ASL", ZPX): 0x16, ("ASL", ABS): 0x0E, ("ASL", ABX): 0x1E, # --- BIT --- ("BIT", IMM): 0x89, # 65C02 ("BIT", ZP): 0x24, ("BIT", ZPX): 0x34, # ZPX is 65C02 ("BIT", ABS): 0x2C, ("BIT", ABX): 0x3C, # ABX is 65C02 # --- Branch instructions (all relative) --- ("BCC", REL): 0x90, ("BCS", REL): 0xB0, ("BEQ", REL): 0xF0, ("BMI", REL): 0x30, ("BNE", REL): 0xD0, ("BPL", REL): 0x10, ("BRA", REL): 0x80, # 65C02 ("BVC", REL): 0x50, ("BVS", REL): 0x70, # --- BRK --- ("BRK", IMP): 0x00, # --- CLC / CLD / CLI / CLV --- ("CLC", IMP): 0x18, ("CLD", IMP): 0xD8, ("CLI", IMP): 0x58, ("CLV", IMP): 0xB8, # --- CMP --- ("CMP", IMM): 0xC9, ("CMP", ZP): 0xC5, ("CMP", ZPX): 0xD5, ("CMP", ABS): 0xCD, ("CMP", ABX): 0xDD, ("CMP", ABY): 0xD9, ("CMP", IDX): 0xC1, ("CMP", IDY): 0xD1, ("CMP", ZPI): 0xD2, # --- CPX --- ("CPX", IMM): 0xE0, ("CPX", ZP): 0xE4, ("CPX", ABS): 0xEC, # --- CPY --- ("CPY", IMM): 0xC0, ("CPY", ZP): 0xC4, ("CPY", ABS): 0xCC, # --- DEC --- ("DEC", ACC): 0x3A, # 65C02 (DEC A) ("DEC", ZP): 0xC6, ("DEC", ZPX): 0xD6, ("DEC", ABS): 0xCE, ("DEC", ABX): 0xDE, # --- DEX / DEY --- ("DEX", IMP): 0xCA, ("DEY", IMP): 0x88, # --- EOR --- ("EOR", IMM): 0x49, ("EOR", ZP): 0x45, ("EOR", ZPX): 0x55, ("EOR", ABS): 0x4D, ("EOR", ABX): 0x5D, ("EOR", ABY): 0x59, ("EOR", IDX): 0x41, ("EOR", IDY): 0x51, ("EOR", ZPI): 0x52, # --- INC --- ("INC", ACC): 0x1A, # 65C02 (INC A) ("INC", ZP): 0xE6, ("INC", ZPX): 0xF6, ("INC", ABS): 0xEE, ("INC", ABX): 0xFE, # --- INX / INY --- ("INX", IMP): 0xE8, ("INY", IMP): 0xC8, # --- JMP --- ("JMP", ABS): 0x4C, ("JMP", IND): 0x6C, # (abs) — indirect ("JMP", AIX): 0x7C, # (abs,X) — absolute indexed indirect, 65C02 # --- JSR --- ("JSR", ABS): 0x20, # --- LDA --- ("LDA", IMM): 0xA9, ("LDA", ZP): 0xA5, ("LDA", ZPX): 0xB5, ("LDA", ABS): 0xAD, ("LDA", ABX): 0xBD, ("LDA", ABY): 0xB9, ("LDA", IDX): 0xA1, ("LDA", IDY): 0xB1, ("LDA", ZPI): 0xB2, # --- LDX --- ("LDX", IMM): 0xA2, ("LDX", ZP): 0xA6, ("LDX", ZPY): 0xB6, ("LDX", ABS): 0xAE, ("LDX", ABY): 0xBE, # --- LDY --- ("LDY", IMM): 0xA0, ("LDY", ZP): 0xA4, ("LDY", ZPX): 0xB4, ("LDY", ABS): 0xAC, ("LDY", ABX): 0xBC, # --- LSR --- ("LSR", ACC): 0x4A, ("LSR", ZP): 0x46, ("LSR", ZPX): 0x56, ("LSR", ABS): 0x4E, ("LSR", ABX): 0x5E, # --- NOP --- ("NOP", IMP): 0xEA, # --- ORA --- ("ORA", IMM): 0x09, ("ORA", ZP): 0x05, ("ORA", ZPX): 0x15, ("ORA", ABS): 0x0D, ("ORA", ABX): 0x1D, ("ORA", ABY): 0x19, ("ORA", IDX): 0x01, ("ORA", IDY): 0x11, ("ORA", ZPI): 0x12, # --- Stack instructions --- ("PHA", IMP): 0x48, ("PHP", IMP): 0x08, ("PHX", IMP): 0xDA, ("PHY", IMP): 0x5A, # 65C02 ("PLA", IMP): 0x68, ("PLP", IMP): 0x28, ("PLX", IMP): 0xFA, ("PLY", IMP): 0x7A, # 65C02 # --- ROL --- ("ROL", ACC): 0x2A, ("ROL", ZP): 0x26, ("ROL", ZPX): 0x36, ("ROL", ABS): 0x2E, ("ROL", ABX): 0x3E, # --- ROR --- ("ROR", ACC): 0x6A, ("ROR", ZP): 0x66, ("ROR", ZPX): 0x76, ("ROR", ABS): 0x6E, ("ROR", ABX): 0x7E, # --- RTI / RTS --- ("RTI", IMP): 0x40, ("RTS", IMP): 0x60, # --- SBC --- ("SBC", IMM): 0xE9, ("SBC", ZP): 0xE5, ("SBC", ZPX): 0xF5, ("SBC", ABS): 0xED, ("SBC", ABX): 0xFD, ("SBC", ABY): 0xF9, ("SBC", IDX): 0xE1, ("SBC", IDY): 0xF1, ("SBC", ZPI): 0xF2, # --- SEC / SED / SEI --- ("SEC", IMP): 0x38, ("SED", IMP): 0xF8, ("SEI", IMP): 0x78, # --- STA --- ("STA", ZP): 0x85, ("STA", ZPX): 0x95, ("STA", ABS): 0x8D, ("STA", ABX): 0x9D, ("STA", ABY): 0x99, ("STA", IDX): 0x81, ("STA", IDY): 0x91, ("STA", ZPI): 0x92, # --- STX --- ("STX", ZP): 0x86, ("STX", ZPY): 0x96, ("STX", ABS): 0x8E, # --- STY --- ("STY", ZP): 0x84, ("STY", ZPX): 0x94, ("STY", ABS): 0x8C, # --- STZ (65C02) --- ("STZ", ZP): 0x64, ("STZ", ZPX): 0x74, ("STZ", ABS): 0x9C, ("STZ", ABX): 0x9E, # --- Transfer instructions --- ("TAX", IMP): 0xAA, ("TAY", IMP): 0xA8, ("TSX", IMP): 0xBA, ("TXA", IMP): 0x8A, ("TXS", IMP): 0x9A, ("TYA", IMP): 0x98, # --- TRB / TSB (65C02) --- ("TRB", ZP): 0x14, ("TRB", ABS): 0x1C, ("TSB", ZP): 0x04, ("TSB", ABS): 0x0C, # --- WAI / STP (WDC 65C02) --- ("WAI", IMP): 0xCB, ("STP", IMP): 0xDB, # --- RMB0-RMB7 (Rockwell / WDC — reset memory bit) --- ("RMB0", ZP): 0x07, ("RMB1", ZP): 0x17, ("RMB2", ZP): 0x27, ("RMB3", ZP): 0x37, ("RMB4", ZP): 0x47, ("RMB5", ZP): 0x57, ("RMB6", ZP): 0x67, ("RMB7", ZP): 0x77, # --- SMB0-SMB7 (Rockwell / WDC — set memory bit) --- ("SMB0", ZP): 0x87, ("SMB1", ZP): 0x97, ("SMB2", ZP): 0xA7, ("SMB3", ZP): 0xB7, ("SMB4", ZP): 0xC7, ("SMB5", ZP): 0xD7, ("SMB6", ZP): 0xE7, ("SMB7", ZP): 0xF7, # --- BBR0-BBR7 (Rockwell / WDC — branch on bit reset) --- ("BBR0", ZPR): 0x0F, ("BBR1", ZPR): 0x1F, ("BBR2", ZPR): 0x2F, ("BBR3", ZPR): 0x3F, ("BBR4", ZPR): 0x4F, ("BBR5", ZPR): 0x5F, ("BBR6", ZPR): 0x6F, ("BBR7", ZPR): 0x7F, # --- BBS0-BBS7 (Rockwell / WDC — branch on bit set) --- ("BBS0", ZPR): 0x8F, ("BBS1", ZPR): 0x9F, ("BBS2", ZPR): 0xAF, ("BBS3", ZPR): 0xBF, ("BBS4", ZPR): 0xCF, ("BBS5", ZPR): 0xDF, ("BBS6", ZPR): 0xEF, ("BBS7", ZPR): 0xFF, } # --------------------------------------------------------------------------- # Set of mnemonics that are branch instructions (use relative addressing) # --------------------------------------------------------------------------- BRANCH_MNEMONICS = { "BCC", "BCS", "BEQ", "BMI", "BNE", "BPL", "BRA", "BVC", "BVS", } # --------------------------------------------------------------------------- # Set of mnemonics that use zero-page-plus-relative (ZPR) addressing # BBRn / BBSn — operand format: zp_addr, branch_target # --------------------------------------------------------------------------- ZPR_MNEMONICS = { "BBR0", "BBR1", "BBR2", "BBR3", "BBR4", "BBR5", "BBR6", "BBR7", "BBS0", "BBS1", "BBS2", "BBS3", "BBS4", "BBS5", "BBS6", "BBS7", } # --------------------------------------------------------------------------- # Instructions that support accumulator addressing (operand "A" or none) # --------------------------------------------------------------------------- ACC_MNEMONICS = {"ASL", "LSR", "ROL", "ROR", "INC", "DEC"} # --------------------------------------------------------------------------- # All known assembler directives # --------------------------------------------------------------------------- DIRECTIVES = { ".ORG", ".BYTE", ".WORD", ".EQU", ".TEXT", ".ASCIIZ", ".DS", ".FILL", ".INCLUDE", } # =================================================================== # Expression evaluator # =================================================================== def parse_value(token, symbols, pc=None): """ Parse a numeric value or expression from *token*. Supports: $FF — hexadecimal ($ prefix) 0xFF — hexadecimal (0x prefix) !255 — decimal (! prefix) &11111111 — binary (& prefix) 255 — bare decimal (digits only, in expressions) 'A' — character literal (ASCII value, single quotes) "Z" — character literal (ASCII value, double quotes) * — current program counter Label — symbol lookup Also supports simple expressions with +, -, * operators and the < (low-byte) and > (high-byte) unary prefix operators. These are evaluated strictly left-to-right (no operator precedence). Examples: LABEL+1, TABLE+$10, ADDR-2, >VECTOR+1, #'>', *+3 Returns an integer, or raises ValueError. """ if not token: raise ValueError("Empty value token") # --- Handle < (low byte) and > (high byte) prefix operators --- # These apply to the entire remaining expression. if token.startswith("<"): val = parse_value(token[1:], symbols, pc) return val & 0xFF if token.startswith(">"): val = parse_value(token[1:], symbols, pc) return (val >> 8) & 0xFF # --- Split on expression operators (+, -, *) --- # We walk left-to-right, accumulating the result. parts, ops = _tokenise_expression(token) if len(parts) == 0: raise ValueError(f"Empty expression: '{token}'") # Evaluate the first atom result = _parse_atom(parts[0], symbols, pc) # Apply each subsequent operator + atom left-to-right for i, op in enumerate(ops): right = _parse_atom(parts[i + 1], symbols, pc) if op == "+": result = result + right elif op == "-": result = result - right elif op == "*": result = result * right else: raise ValueError(f"Unknown operator '{op}' in expression") return result def _tokenise_expression(expr): """ Split an expression string into (atoms, operators). Example: "$1000+$20" -> (["$1000", "$20"], ["+"]) "Label-!2" -> (["Label", "!2"], ["-"]) "'>'+'A'" -> (["'>'", "'A'"], ["+"]) The operators recognised are + - *. The $ ! & 0x prefixes are handled at the atom level and are NOT confused with operators. Quoted character literals ('X' or "X") are kept intact so that characters like '+', '-', '*', '<', '>' are not misinterpreted. """ atoms = [] ops = [] current = "" i = 0 while i < len(expr): ch = expr[i] # --- Quoted character literal: skip to closing quote --- if ch in ('"', "'"): quote = ch current += ch i += 1 while i < len(expr) and expr[i] != quote: if expr[i] == '\\' and i + 1 < len(expr): current += expr[i] + expr[i + 1] i += 2 else: current += expr[i] i += 1 if i < len(expr): current += expr[i] # closing quote i += 1 continue # A '+' or '-' or '*' is an operator ONLY if we already have # accumulated something in 'current' (avoids treating unary # minus or the $ prefix character as an operator). if ch in "+-*" and current: atoms.append(current) ops.append(ch) current = "" else: current += ch i += 1 if current: atoms.append(current) return atoms, ops def _parse_atom(token, symbols, pc=None): """ Parse a single atomic value (no expression operators). Resolution order: 1. * (current program counter) 2. $hex 3. 0xhex 4. !decimal 5. &binary 6. Quoted character literal ('A', ">", '\\n') 7. Symbol/label lookup (case-insensitive) 8. Bare decimal (all-digit token, e.g. "1", "255") """ if not token: raise ValueError("Empty atom in expression") # --- * = current program counter --- if token == "*": if pc is not None: return pc raise ValueError("'*' (program counter) not available in this context") # --- $hex --- if token.startswith("$"): return int(token[1:], 16) # --- 0xhex --- if token.lower().startswith("0x"): return int(token, 16) # --- !decimal --- if token.startswith("!"): return int(token[1:], 10) # --- &binary --- if token.startswith("&"): return int(token[1:], 2) # --- Quoted character literal: 'X' or "X" -> ASCII value --- if (len(token) >= 3 and token[0] in ('"', "'") and token[-1] == token[0]): content = token[1:-1] # Process escape sequences content = content.replace("\\n", "\n") content = content.replace("\\t", "\t") content = content.replace("\\0", "\x00") content = content.replace("\\\\", "\\") if len(content) == 1: return ord(content) raise ValueError( f"Character literal must be a single character: {token}" ) # --- Symbol/label lookup --- upper = token.upper() if upper in symbols: return symbols[upper] # --- Bare decimal (all digits) --- if token.isdigit(): return int(token, 10) raise ValueError(f"Unknown symbol or bad number: '{token}'") # =================================================================== # Helper: detect addressing mode and extract operand value(s) # =================================================================== def parse_operand(mnemonic, operand_str, symbols, pc): """ Analyse the operand string and return: (addressing_mode, [byte_values]) *byte_values* is a list of the operand bytes that follow the opcode (may be empty for IMP/ACC, 1 element for ZP/IMM/REL, 2 for ABS...). Parameters ---------- mnemonic : str — uppercase mnemonic (e.g. "LDA") operand_str: str — raw operand text (e.g. "#$0A", "%20,X", "[40,X]") symbols : dict — label/constant table built during pass 1 pc : int — current program counter (needed for REL calc) """ # ---- No operand -> Implied or Accumulator ---- if not operand_str or operand_str == "": if mnemonic in ACC_MNEMONICS and (mnemonic, ACC) in OPCODES: return ACC, [] return IMP, [] raw = operand_str.strip() # ---- Accumulator: "A" ---- if raw.upper() == "A": return ACC, [] # ---- ZPR (BBR/BBS): "zp_addr, branch_target" ---- if mnemonic in ZPR_MNEMONICS: parts = [p.strip() for p in raw.split(",")] if len(parts) != 2: raise ValueError(f"{mnemonic} requires zp_addr, branch_target") zp_val = parse_value(parts[0], symbols, pc) & 0xFF target = parse_value(parts[1], symbols, pc) # Relative offset is from byte AFTER the 3-byte instruction offset = target - (pc + 3) if offset < -128 or offset > 127: raise ValueError( f"Branch target out of range for {mnemonic} " f"(offset {offset})" ) return ZPR, [zp_val, offset & 0xFF] # ---- Branch (relative) ---- if mnemonic in BRANCH_MNEMONICS: target = parse_value(raw, symbols, pc) # Relative offset is from byte AFTER the 2-byte instruction offset = target - (pc + 2) if offset < -128 or offset > 127: raise ValueError( f"Branch target out of range for {mnemonic} " f"(offset {offset})" ) return REL, [offset & 0xFF] # ---- Immediate: #value ---- if raw.startswith("#"): val = parse_value(raw[1:], symbols, pc) return IMM, [val & 0xFF] # ---- Indirect: [value] / [value,X] / [value],Y ---- if raw.startswith("["): inner = raw[1:] # strip leading '[' # Check for [value],Y (post-indexed indirect) if "]" in inner: bracket_pos = inner.index("]") after_bracket = inner[bracket_pos + 1:].strip() inside = inner[:bracket_pos].strip() if after_bracket.upper().startswith(",Y"): # (zp),Y — post-indexed indirect val = parse_value(inside, symbols, pc) return IDY, [val & 0xFF] if after_bracket == "": if "," in inside: # [value,X] or [value,Y] parts = [p.strip() for p in inside.split(",")] idx_reg = parts[1].upper() val = parse_value(parts[0], symbols, pc) if idx_reg == "X": if mnemonic == "JMP": return AIX, [val & 0xFF, (val >> 8) & 0xFF] return IDX, [val & 0xFF] elif idx_reg == "Y": return IDY, [val & 0xFF] else: raise ValueError( f"Invalid index register in indirect: '{raw}'" ) else: # [value] — plain indirect val = parse_value(inside, symbols, pc) if mnemonic == "JMP": return IND, [val & 0xFF, (val >> 8) & 0xFF] else: return ZPI, [val & 0xFF] raise ValueError(f"Malformed indirect operand: '{raw}'") # ---- Zero-page forced: %value / %value,X / %value,Y ---- if raw.startswith("%"): inner = raw[1:] if "," in inner: parts = [p.strip() for p in inner.split(",")] val = parse_value(parts[0], symbols, pc) idx_reg = parts[1].upper() if idx_reg == "X": return ZPX, [val & 0xFF] elif idx_reg == "Y": return ZPY, [val & 0xFF] else: raise ValueError(f"Invalid index register: '{idx_reg}'") else: val = parse_value(inner, symbols, pc) return ZP, [val & 0xFF] # ---- Absolute / Absolute,X / Absolute,Y (or auto zero-page) ---- if "," in raw: parts = [p.strip() for p in raw.split(",")] val = parse_value(parts[0], symbols, pc) idx_reg = parts[1].upper() if idx_reg == "X": if val <= 0xFF and (mnemonic, ZPX) in OPCODES: return ZPX, [val & 0xFF] return ABX, [val & 0xFF, (val >> 8) & 0xFF] elif idx_reg == "Y": if val <= 0xFF and (mnemonic, ZPY) in OPCODES: return ZPY, [val & 0xFF] return ABY, [val & 0xFF, (val >> 8) & 0xFF] else: raise ValueError(f"Invalid index register: '{idx_reg}'") else: val = parse_value(raw, symbols, pc) if val <= 0xFF and (mnemonic, ZP) in OPCODES: return ZP, [val & 0xFF] return ABS, [val & 0xFF, (val >> 8) & 0xFF] # =================================================================== # Line parser — break a source line into its components # =================================================================== def parse_line(line): """ Parse a single source line into: (label, mnemonic, operand_str, comment) Any component may be None/empty if not present. A trailing colon on a label is stripped: "MyLabel:" -> "MyLabel". """ # --- Strip trailing newline / carriage-return --- raw = line.rstrip("\n\r") # --- Separate the comment (first ';' NOT inside a quoted string) --- # We walk character-by-character, tracking quote state so that # semicolons inside "strings" or 'strings' are left alone. comment = "" in_quote = None # None, '"', or "'" semi = -1 i = 0 while i < len(raw): ch = raw[i] if in_quote is None: if ch in ('"', "'"): in_quote = ch elif ch == ';': semi = i break else: if ch == '\\' and i + 1 < len(raw): i += 2 # skip backslash AND the escaped character continue if ch == in_quote: in_quote = None i += 1 if semi >= 0: comment = raw[semi:] # includes the ';' raw = raw[:semi] # --- If nothing left, it was a comment-only or blank line --- stripped = raw.rstrip() if not stripped: return (None, None, None, comment) # --- Detect label: starts in column 1 (no leading whitespace) --- label = None if stripped[0] not in (" ", "\t"): tokens = stripped.split(None, 1) label = tokens[0] # Strip optional trailing colon from label (e.g. "Loop:") if label.endswith(":"): label = label[:-1] stripped = tokens[1] if len(tokens) > 1 else "" stripped = stripped.strip() # --- The remainder is MNEMONIC [OPERAND] --- if not stripped: return (label, None, None, comment) tokens = stripped.split(None, 1) mnemonic = tokens[0].upper() operand_str = tokens[1].strip() if len(tokens) > 1 else "" return (label, mnemonic, operand_str, comment) # =================================================================== # Local label support # =================================================================== def resolve_local_label(label, current_scope): """ If *label* starts with '.' or '@' it is a local label. We prefix it with the current scope (most-recent non-local label) to create a unique key: e.g. scope="MAIN", label=".loop" -> "MAIN..LOOP" Non-local labels are returned as-is (uppercased). """ if label and (label[0] in (".", "@")): return f"{current_scope}.{label.upper()}" return label.upper() # =================================================================== # Directive handlers # =================================================================== def is_directive(mnemonic): """Return True if *mnemonic* is an assembler directive.""" return mnemonic in DIRECTIVES def handle_directive_pass2(mnemonic, operand_str, symbols, pc, label): """ Process a directive during pass 2 and return emitted bytes. Returns (new_pc, bytes_list): new_pc — updated program counter after directive bytes_list — list of bytes emitted (may be empty) """ if mnemonic == ".ORG": val = parse_value(operand_str.strip(), symbols, pc) return val, [] elif mnemonic == ".EQU": val = parse_value(operand_str.strip(), symbols, pc) if label: symbols[label.upper()] = val return pc, [] elif mnemonic == ".BYTE": items = _parse_byte_items(operand_str) data = _byte_items_to_data(items, symbols, pc) return pc + len(data), data elif mnemonic == ".WORD": parts = [p.strip() for p in operand_str.split(",")] data = [] for p in parts: val = parse_value(p, symbols, pc) data.append(val & 0xFF) data.append((val >> 8) & 0xFF) return pc + len(data), data elif mnemonic == ".TEXT": # Extract quoted string: .text "Hello" text = _extract_string(operand_str) data = list(text.encode("ascii")) return pc + len(data), data elif mnemonic == ".ASCIIZ": # Same as .text but appends a $00 null terminator text = _extract_string(operand_str) data = list(text.encode("ascii")) + [0x00] return pc + len(data), data elif mnemonic == ".DS": # Reserve (skip) count bytes filled with $00 count = parse_value(operand_str.strip(), symbols, pc) data = [0x00] * count return pc + count, data elif mnemonic == ".FILL": # Fill count bytes with a given value: .fill count,value parts = [p.strip() for p in operand_str.split(",")] count = parse_value(parts[0], symbols, pc) fill_val = (parse_value(parts[1], symbols, pc) & 0xFF if len(parts) > 1 else 0x00) data = [fill_val] * count return pc + count, data elif mnemonic == ".INCLUDE": # Already expanded during file loading — no-op here return pc, [] raise ValueError(f"Unknown directive: {mnemonic}") def _extract_string(operand_str): """ Extract a quoted string from an operand. Accepts both "double" and 'single' quotes. Supports \\n, \\t, \\\\, \\0 escape sequences. """ s = operand_str.strip() if len(s) < 2: raise ValueError(f"Invalid string literal: {operand_str}") quote = s[0] if quote not in ('"', "'"): raise ValueError(f"String must start with a quote: {operand_str}") if not s.endswith(quote): raise ValueError(f"Unterminated string: {operand_str}") # Extract the content between quotes content = s[1:-1] # Process escape sequences content = content.replace("\\n", "\n") content = content.replace("\\t", "\t") content = content.replace("\\0", "\x00") content = content.replace("\\\\", "\\") return content def _parse_byte_items(operand_str): """ Tokenise a .byte operand string into individual items, respecting quoted strings (single or double). Each item is either a quoted-string token (still in quotes) or a numeric/symbol expression. Examples: '$41,$42,$43' -> ['$41', '$42', '$43'] '"Hello",$0D,$0A' -> ['"Hello"', '$0D', '$0A'] '"Hi, World",$00' -> ['"Hi, World"', '$00'] "'AB',$FF" -> ["'AB'", '$FF'] '"semi;colon",$00' -> ['"semi;colon"', '$00'] Raises ValueError on unterminated strings. """ items = [] current = "" i = 0 s = operand_str.strip() while i < len(s): ch = s[i] # --- Start of a quoted string --- if ch in ('"', "'"): quote = ch token = ch i += 1 while i < len(s) and s[i] != quote: if s[i] == '\\' and i + 1 < len(s): token += s[i] + s[i + 1] i += 2 else: token += s[i] i += 1 if i >= len(s): raise ValueError(f"Unterminated string in .byte: {operand_str}") token += s[i] # closing quote i += 1 # Flush any preceding non-string content if current.strip(): items.append(current.strip()) current = "" items.append(token) elif ch == ',': if current.strip(): items.append(current.strip()) current = "" i += 1 else: current += ch i += 1 if current.strip(): items.append(current.strip()) return items def _byte_items_to_data(items, symbols, pc=None): """ Convert the tokenised items from _parse_byte_items into a flat list of byte values. Quoted strings are converted to ASCII byte values. Everything else is evaluated as a numeric expression. """ data = [] for item in items: if len(item) >= 2 and item[0] in ('"', "'") and item[-1] == item[0]: text = _extract_string(item) data.extend(text.encode("ascii")) else: val = parse_value(item, symbols, pc) data.append(val & 0xFF) return data def _count_byte_items(items): """ Count the number of bytes that a list of .byte items will emit. Used in pass 1 where we don't need to resolve symbol values, just get the size right. Quoted strings contribute their character count (after escape processing). Everything else contributes 1 byte. """ total = 0 for item in items: if len(item) >= 2 and item[0] in ('"', "'") and item[-1] == item[0]: text = _extract_string(item) total += len(text.encode("ascii")) else: total += 1 return total # =================================================================== # File loader with .include support # =================================================================== def load_source(src_path, include_stack=None): """ Load a source file, recursively expanding any .include directives. Returns a list of (original_line_text, source_file, file_line_number) tuples. The .include directive line itself is kept for the listing, and the included file's lines are inserted right after it. *include_stack* tracks nested includes to detect circular references. """ if include_stack is None: include_stack = [] # Normalise path for circular-reference detection abs_path = os.path.abspath(src_path) if abs_path in include_stack: raise ValueError( f"Circular .include detected: {src_path}\n" f" Include chain: {' -> '.join(include_stack)}" ) include_stack.append(abs_path) try: with open(src_path, "r") as f: raw_lines = f.readlines() except FileNotFoundError: raise FileNotFoundError(f"Source file not found: {src_path}") # Base directory for resolving relative .include paths base_dir = os.path.dirname(abs_path) or "." result = [] for file_line_num, line in enumerate(raw_lines, start=1): # Check if this line is an .include directive _label, mnemonic, operand_str, _comment = parse_line(line) if mnemonic == ".INCLUDE": # Keep the .include line itself in the listing result.append((line, src_path, file_line_num)) # Resolve the included filename inc_file = _extract_string(operand_str) inc_path = os.path.join(base_dir, inc_file) # Recursively load included file included = load_source(inc_path, include_stack[:]) result.extend(included) else: result.append((line, src_path, file_line_num)) return result # =================================================================== # Instruction-size guesser (pass 1 fallback for forward references) # =================================================================== def guess_instruction_size(mnemonic, operand_str): """ When pass 1 cannot resolve a forward reference, guess the size from the operand syntax. This is a heuristic fallback. """ if not operand_str: return 1 # Implied or accumulator raw = operand_str.strip() if raw.upper() == "A": return 1 # Accumulator if raw.startswith("#"): return 2 # Immediate if raw.startswith("%"): return 2 # Zero-page (forced) if raw.startswith("["): if mnemonic == "JMP": return 3 # JMP indirect is always 3 bytes return 2 # (zp) / (zp,X) / (zp),Y are 2 bytes # Default: assume absolute (3 bytes) for unresolved references return 3 # =================================================================== # Pass 1 — collect labels and determine addresses # =================================================================== def pass1(lines): """ First pass: scan all lines, record label addresses, and compute the size of each instruction so we know addresses for pass 2. *lines* is a list of (line_text, source_file, file_line_number) tuples as returned by load_source(). Returns: symbols : dict — {LABEL_UPPER: address} origin : int — starting address (.org value) errors : list — error messages from pass 1 """ symbols = {} # label -> address (or .equ value) sym_def_lines = {} # label -> listing line number where defined pc = 0x0000 # default origin origin = None # set on first .org errors = [] # error messages current_scope = "" # for local label scoping deferred_equs = [] # .equ lines with forward references for seq_num, (line_text, src_file, file_line) in enumerate(lines): label, mnemonic, operand_str, _comment = parse_line(line_text) # --- Handle label --- if label: # Determine if this is a local label if label[0] in (".", "@"): resolved = resolve_local_label(label, current_scope) else: # Non-local label becomes the new scope current_scope = label.upper() resolved = label.upper() if resolved in symbols: errors.append( f"{src_file}:{file_line}: WARNING - " f"Duplicate label '{label}' (overwritten)" ) symbols[resolved] = pc sym_def_lines[resolved] = seq_num + 1 # 1-based listing line # --- Skip if no instruction/directive --- if not mnemonic: continue # --- Handle directives --- if is_directive(mnemonic): if mnemonic == ".ORG": try: val = parse_value(operand_str.strip(), symbols, pc) pc = val if origin is None: origin = val # Re-assign label to new PC if on same line if label: resolved_lbl = label.upper() if label[0] in (".", "@"): resolved_lbl = resolve_local_label( label, current_scope) symbols[resolved_lbl] = pc sym_def_lines[resolved_lbl] = seq_num + 1 except ValueError as e: errors.append(f"{src_file}:{file_line}: {e}") elif mnemonic == ".EQU": try: val = parse_value(operand_str.strip(), symbols, pc) if label: resolved_lbl = label.upper() if label[0] in (".", "@"): resolved_lbl = resolve_local_label( label, current_scope) symbols[resolved_lbl] = val sym_def_lines[resolved_lbl] = seq_num + 1 except ValueError: # Forward reference — defer to after pass 1 if label: resolved_lbl = label.upper() if label[0] in (".", "@"): resolved_lbl = resolve_local_label( label, current_scope) deferred_equs.append( (resolved_lbl, operand_str.strip(), src_file, file_line, seq_num + 1) ) elif mnemonic == ".BYTE": try: items = _parse_byte_items(operand_str) pc += _count_byte_items(items) except ValueError as e: errors.append(f"{src_file}:{file_line}: {e}") elif mnemonic == ".WORD": parts = [p.strip() for p in operand_str.split(",")] pc += len(parts) * 2 elif mnemonic == ".TEXT": try: text = _extract_string(operand_str) pc += len(text.encode("ascii")) except ValueError as e: errors.append(f"{src_file}:{file_line}: {e}") elif mnemonic == ".ASCIIZ": try: text = _extract_string(operand_str) pc += len(text.encode("ascii")) + 1 # +1 for null except ValueError as e: errors.append(f"{src_file}:{file_line}: {e}") elif mnemonic == ".DS": try: count = parse_value(operand_str.strip(), symbols, pc) pc += count except ValueError as e: errors.append(f"{src_file}:{file_line}: {e}") elif mnemonic == ".FILL": try: parts = [p.strip() for p in operand_str.split(",")] count = parse_value(parts[0], symbols, pc) pc += count except ValueError as e: errors.append(f"{src_file}:{file_line}: {e}") elif mnemonic == ".INCLUDE": pass # already expanded by load_source() continue # --- Determine instruction size --- if mnemonic in ZPR_MNEMONICS: pc += 3 elif mnemonic in BRANCH_MNEMONICS: pc += 2 else: try: mode, _vals = parse_operand( mnemonic, operand_str, symbols, pc) pc += MODE_SIZE.get(mode, 1) except ValueError: pc += guess_instruction_size(mnemonic, operand_str) # --- Resolve deferred .equ definitions (forward references) --- for resolved_lbl, expr_str, src_file, file_line, def_line in deferred_equs: try: val = parse_value(expr_str, symbols, pc) symbols[resolved_lbl] = val sym_def_lines[resolved_lbl] = def_line except ValueError as e: errors.append( f"{src_file}:{file_line}: Cannot resolve .equ " f"'{resolved_lbl}' = '{expr_str}': {e}" ) if origin is None: origin = 0x0000 return symbols, sym_def_lines, origin, errors # =================================================================== # Pass 2 — generate machine code # =================================================================== def pass2(lines, symbols, origin): """ Second pass: generate machine code bytes and build listing data. *lines* is a list of (line_text, source_file, file_line_number) tuples. Returns: listing : list of tuples for writing the listing file code : bytearray — assembled binary start_addr : int — address of first byte end_addr : int — address one past the last byte errors : list of error messages """ pc = origin start_addr = origin end_addr = origin errors = [] current_scope = "" # for local label scoping sym_ref_lines = {} # label -> sorted list of listing line numbers # Accumulate code keyed by address (supports .org gaps) code_map = {} # Listing: one entry per source line # Each entry: (seq_num, addr_or_None, bytes_list, source_line_text) listing = [] for seq_num, (line_text, src_file, file_line) in enumerate(lines): label, mnemonic, operand_str, comment = parse_line(line_text) source_text = line_text.rstrip("\n\r") display_line = seq_num + 1 # sequential line number in listing # --- Track scope for local labels --- if label and label[0] not in (".", "@"): current_scope = label.upper() # --- Lines with no instruction -> listing-only --- if not mnemonic: listing.append((display_line, None, [], source_text)) continue # --- Directives --- if is_directive(mnemonic): # Track any symbol references in the directive operand for sym in _symbols_in_operand(operand_str, symbols): sym_ref_lines.setdefault(sym, []).append(display_line) if mnemonic == ".ORG": try: val = parse_value(operand_str.strip(), symbols, pc) pc = val except ValueError as e: errors.append(f"{src_file}:{file_line}: {e}") listing.append((display_line, None, [], source_text)) elif mnemonic == ".EQU": # Already resolved in pass 1 / deferred resolution listing.append((display_line, None, [], source_text)) elif mnemonic == ".INCLUDE": listing.append((display_line, None, [], source_text)) elif mnemonic in ( ".BYTE", ".WORD", ".TEXT", ".ASCIIZ", ".DS", ".FILL" ): try: new_pc, data = handle_directive_pass2( mnemonic, operand_str, symbols, pc, label ) for i, b in enumerate(data): code_map[pc + i] = b listing.append((display_line, pc, data, source_text)) pc = new_pc if pc > end_addr: end_addr = pc except ValueError as e: errors.append(f"{src_file}:{file_line}: {e}") listing.append((display_line, pc, [], source_text)) continue # --- Regular instruction --- # Resolve local label references in operand resolved_operand = _resolve_operand_locals( operand_str, symbols, current_scope ) try: mode, operand_bytes = parse_operand( mnemonic, resolved_operand, symbols, pc ) except ValueError as e: errors.append(f"{src_file}:{file_line}: {e}") listing.append((display_line, pc, [], source_text)) continue # Look up opcode key = (mnemonic, mode) if key not in OPCODES: errors.append( f"{src_file}:{file_line}: Invalid addressing mode " f"{mode} for {mnemonic}" ) listing.append((display_line, pc, [], source_text)) continue opcode = OPCODES[key] inst_bytes = [opcode] + operand_bytes inst_size = len(inst_bytes) # Sanity-check size expected = MODE_SIZE[mode] if inst_size != expected: errors.append( f"{src_file}:{file_line}: Internal size mismatch for " f"{mnemonic} {mode} - got {inst_size}, expected {expected}" ) # Store bytes for i, b in enumerate(inst_bytes): code_map[pc + i] = b # --- Track symbol references --- for sym in _symbols_in_operand(operand_str, symbols): sym_ref_lines.setdefault(sym, []).append(display_line) listing.append((display_line, pc, inst_bytes, source_text)) pc += inst_size if pc > end_addr: end_addr = pc # --- Flatten code_map into a contiguous bytearray --- if not code_map: return listing, bytearray(), start_addr, start_addr, errors min_addr = min(code_map.keys()) max_addr = max(code_map.keys()) size = max_addr - min_addr + 1 code = bytearray(size) for addr, byte_val in code_map.items(): code[addr - min_addr] = byte_val return listing, code, min_addr, max_addr + 1, errors, sym_ref_lines def _symbols_in_operand(operand_str, symbols): """ Return a set of symbol names (uppercase) that appear in *operand_str* and are actually present in the *symbols* table. Used by pass 2 to record which symbols are referenced on each line. Numeric literals ($, 0x, !, &) and register names (A/X/Y) are silently ignored. """ if not operand_str: return set() raw = operand_str.strip() # Strip mode-prefix characters to get at the bare expression(s) # Remove leading # % [ ] and trailing ],Y / ,X / ,Y fragments cleaned = re.sub(r'[\[\]#%]', ' ', raw) cleaned = re.sub(r',\s*[XYxy]\b', ' ', cleaned) cleaned = re.sub(r'\]\s*,\s*[Yy]\b', ' ', cleaned) # Also handle < > unary prefixes — strip them cleaned = re.sub(r'[<>]', ' ', cleaned) found = set() # Split on expression operators and whitespace to get individual atoms atoms = re.split(r'[\s+\-*,]+', cleaned) for atom in atoms: atom = atom.strip() if not atom: continue # Skip numeric literals if atom.startswith('$') or atom.startswith('!') or atom.startswith('&'): continue if atom.lower().startswith('0x'): continue # Skip register names used standalone if atom.upper() in ('A', 'X', 'Y'): continue upper = atom.upper() if upper in symbols: found.add(upper) return found def _resolve_operand_locals(operand_str, symbols, current_scope): """ If the operand references a local label (starts with . or @), check whether a scope-qualified version exists in *symbols* and replace it with the numeric value so parse_value can find it. """ if not operand_str: return operand_str # Quick check — does the operand contain a local-label character? if "." not in operand_str and "@" not in operand_str: return operand_str def replacer(match): """Replace a local label token with its resolved hex value.""" token = match.group(0) qualified = f"{current_scope}.{token.upper()}" if qualified in symbols: return f"${symbols[qualified]:X}" return token # Match . or @ followed by word characters (label body) result = re.sub(r'[.@]\w+', replacer, operand_str) return result # =================================================================== # Output: human-readable listing (.txt) # =================================================================== def write_listing(listing, symbols, sym_def_lines, sym_ref_lines, out_path, origin, code_size, start_addr, end_addr, error_count, checksum, pass1_errors, pass2_errors): """ Write a formatted listing file matching the Example_Printout format, plus a symbol table appendix and assembly summary. Format of each line (fixed columns): Cols 1-16 : Address + hex bytes (left-justified, space-padded) Cols 17-21 : Line number (5 digits, zero-padded) Cols 22-23 : Two spaces Cols 24+ : Original source text """ MAX_HEX_PER_LINE = 3 # max operand bytes shown on one listing line with open(out_path, "w") as f: # --- Listing body --- for (line_num, addr, data_bytes, source_text) in listing: if addr is not None and data_bytes: # First line: up to MAX_HEX_PER_LINE bytes chunk = data_bytes[:MAX_HEX_PER_LINE] addr_str = f"{addr:04X}" hex_str = " ".join(f"{b:02X}" for b in chunk) left = f"{addr_str} {hex_str}" left = f"{left:<16s}" f.write(f"{left}{line_num:05d} {source_text}\n") # Continuation lines for remaining bytes offset = MAX_HEX_PER_LINE while offset < len(data_bytes): chunk = data_bytes[offset:offset + MAX_HEX_PER_LINE] cont_addr = addr + offset cont_hex = " ".join(f"{b:02X}" for b in chunk) cont_left = f"{cont_addr:04X} {cont_hex}" cont_left = f"{cont_left:<16s}" f.write(f"{cont_left} \n") offset += MAX_HEX_PER_LINE else: left = f"{'':<16s}" f.write(f"{left}{line_num:05d} {source_text}\n") # --- Symbol table appendix --- f.write("\n") f.write("=" * 60 + "\n") f.write(" SYMBOL TABLE\n") f.write("=" * 60 + "\n") if symbols: for name in sorted(symbols.keys()): val = symbols[name] def_line = sym_def_lines.get(name) refs = sym_ref_lines.get(name, []) def_str = f" Defined:{def_line:05d}" if def_line is not None else " Defined:-----" if refs: refs_str = " references:" + ",".join(f"{r:05d}" for r in refs) else: refs_str = " references:-" f.write(f" {name:<24s} = ${val:04X} {def_str} {refs_str}\n") else: f.write(" (no symbols defined)\n") # --- Error list --- all_errors = ( [f"[P1] {e}" for e in pass1_errors] + [f"[P2] {e}" for e in pass2_errors] ) f.write("\n") f.write("=" * 60 + "\n") f.write(" ERROR LIST\n") f.write("=" * 60 + "\n") if all_errors: for err in all_errors: f.write(f" {err}\n") else: f.write(" (no errors)\n") # --- Assembly summary --- f.write("\n") f.write("-" * 60 + "\n") f.write(f" Origin : ${origin:04X}\n") if code_size > 0: f.write( f" Range : ${start_addr:04X} - ${end_addr - 1:04X}\n") f.write(f" Size : {code_size} bytes\n") f.write(f" CRC-16 : ${checksum:04X}\n") f.write(f" Errors : {error_count}\n") f.write("-" * 60 + "\n") # =================================================================== # Output: binary file (.bin) # =================================================================== def write_binary(code, out_path): """Write the assembled machine code as a raw binary file.""" with open(out_path, "wb") as f: f.write(code) # =================================================================== # Main entry point # =================================================================== def main(): """ Main assembler driver. Reads a source file (with .include expansion), performs two-pass assembly, and writes the binary and listing output files. """ # --- Print banner --- print(f"65C02 Assembler v{VERSION}") print(f"Target: WDC W65C02S (full instruction set)") print() # --- Check command-line arguments --- if len(sys.argv) < 2: print("Usage: python3 asm65c02.py ") sys.exit(1) src_path = sys.argv[1] # --- Derive output filenames --- base, _ext = os.path.splitext(src_path) bin_path = base + ".bin" txt_path = base + ".txt" # --- Load source file (with .include expansion) --- try: lines = load_source(src_path) except (FileNotFoundError, ValueError) as e: print(f"ERROR: {e}") sys.exit(1) total_lines = len(lines) print(f"Source : {src_path} ({total_lines} lines after include expansion)") # --- Pass 1: collect labels and compute addresses --- print("Pass 1 : Collecting labels and addresses...") symbols, sym_def_lines, origin, pass1_errors = pass1(lines) if pass1_errors: for e in pass1_errors: print(f" [P1] {e}") if symbols: print(f" {len(symbols)} symbol(s) defined") for name, val in sorted(symbols.items()): print(f" {name:<24s} = ${val:04X}") # --- Pass 2: generate machine code --- print("Pass 2 : Generating machine code...") listing, code, start_addr, end_addr, pass2_errors, sym_ref_lines = pass2( lines, symbols, origin ) if pass2_errors: for e in pass2_errors: print(f" [P2] {e}") total_errors = len(pass1_errors) + len(pass2_errors) # --- Write outputs --- write_binary(code, bin_path) # --- Compute CRC-16 checksum of binary output --- checksum = 0 for byte in code: checksum = (checksum + byte) & 0xFFFF print(f"Binary : {bin_path} " f"({len(code)} bytes, ${start_addr:04X}-${end_addr - 1:04X})") write_listing( listing, symbols, sym_def_lines, sym_ref_lines, txt_path, origin, len(code), start_addr, end_addr, total_errors, checksum, pass1_errors, pass2_errors ) print(f"Listing: {txt_path}") # --- Summary --- print() if total_errors > 0: print(f"Assembly complete with {total_errors} error(s).") else: print("Assembly complete - no errors.") print(f"CRC-16 : ${checksum:04X}") # --- Run --- if __name__ == "__main__": main()